filtered_data <- read.csv("../data/data-2023-09-11 (2).csv", header = TRUE)
selected_stats <- c("Ho", "Hs", "Ht", "Fis (W&C)", "Fst (W&C)", "Fis (Nei)", "Fst (Nei)"
)
n_rep=10
n_pop = 8
sequence_length <- length(6:11)
library(hierfstat)
library(adegenet)
filtered_data <- data.frame(indv = paste(substr(filtered_data$Population,1,3), row.names(filtered_data), sep="."), filtered_data)
# Create mydata_genind
population <- filtered_data$Population
mydata_genind <- df2genind(
X = filtered_data[,6:11],
sep = "/",
ncode = 6,
ind.names = filtered_data$indv,
pop = filtered_data$Population,
NA.char = "0/0",
ploidy = 2,
type = "codom",
strata = NULL,
hierarchy = NULL
)
mydata_genind
/// GENIND OBJECT /////////
// 698 individuals; 6 loci; 48 alleles; size: 195 Kb
// Basic content
@tab: 698 x 48 matrix of allele counts
@loc.n.all: number of alleles per locus (range: 5-12)
@loc.fac: locus factor for the 48 columns of @tab
@all.names: list of allele names for each locus
@ploidy: ploidy of each individual (range: 2-2)
@type: codom
@call: df2genind(X = filtered_data[, 6:11], sep = "/", ncode = 6, ind.names = filtered_data$indv,
pop = filtered_data$Population, NA.char = "0/0", ploidy = 2,
type = "codom", strata = NULL, hierarchy = NULL)
// Optional content
@pop: population of each individual (group size range: 24-166)
mydata_hierfstat <- genind2hierfstat(mydata_genind)
library(pegas)
Loading required package: ape
Attaching package: ‘ape’
The following objects are masked from ‘package:hierfstat’:
pcoa, varcomp
Registered S3 method overwritten by 'pegas':
method from
print.amova ade4
Attaching package: ‘pegas’
The following object is masked from ‘package:ape’:
mst
The following object is masked from ‘package:ade4’:
amova
library(dplyr)
Attaching package: ‘dplyr’
The following object is masked from ‘package:ape’:
where
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(tibble)
result <- basic.stats(mydata_hierfstat)
df_resutl_basic<-as.data.frame(result$perloc)
# Weir and Cockrham estimates of Fstatistics - FIS and FST
result_f_stats <- Fst(as.loci(mydata_genind), na.alleles = "")
result_f_stats <- result_f_stats[,2:3]
colnames(result_f_stats) <- c("Fst (W&C)", "Fis (W&C)")
result_f_stats <- merge(result_f_stats, df_resutl_basic, by="row.names",all.x=TRUE)
colnames(result_f_stats)[10] <- "Fst (Nei)"
colnames(result_f_stats)[12] <- "Fis (Nei)"
result_f_stats <- result_f_stats %>% column_to_rownames(., var = 'Row.names')
result_f_stats_selec <- result_f_stats %>% select(all_of(selected_stats))
result_f_stats_selec
library(hrbrthemes)
library(ggplot2)
# compute the Gstats
result_f_stats <- result_f_stats %>% mutate(GST = 1-Hs/Ht)
result_f_stats <- result_f_stats %>% mutate("GST''" = (n_pop*(Ht-Hs))/((n_pop*Hs-Ht)*(1-Hs)))
result_f_stats
# Plot with linear trend
p2 <- ggplot(result_f_stats, aes(x=GST, y=Hs)) +
geom_point() +
geom_smooth(method=lm , color="red", se=FALSE) +
theme_ipsum()
p2
# Libraries
library("poppr")
This is poppr version 2.9.4. To get started, type package?poppr
OMP parallel support: available
library("heatmaply")
Loading required package: plotly
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
Loading required package: viridis
Loading required package: viridisLite
Registered S3 method overwritten by 'dendextend':
method from
rev.hclust vegan
Registered S3 method overwritten by 'seriation':
method from
reorder.hclust vegan
======================
Welcome to heatmaply version 1.5.0
Type citation('heatmaply') for how to cite the package.
Type ?heatmaply for the main documentation.
The github page is: https://github.com/talgalili/heatmaply/
Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
You may ask questions at stackoverflow, use the r and heatmaply tags:
https://stackoverflow.com/questions/tagged/heatmaply
======================
missing_data <- info_table(mydata_genind, plot = FALSE)
# Matrix format
mat <- as.matrix(missing_data)
# heatmap
p <- heatmaply(mat,
dendrogram = "none",
xlab = "", ylab = "",
main = "",
scale = "column",
margins = c(60,100,40,20),
grid_color = "white",
grid_width = 0.00001,
titleX = FALSE,
hide_colorbar = TRUE,
branches_lwd = 0.1,
label_names = c("Population", "Marker", "Value"),
fontsize_row = 8, fontsize_col = 8,
labCol = colnames(mat),
labRow = rownames(mat),
heatmap_layers = theme(axis.line=element_blank())
)
p
library("pegas")
hw.test(as.loci(mydata_genind), B = 1000)
chi^2 df Pr(chi^2 >) Pr.exact
B12 693.49866 15 0.0000000 0.000
C07 27.78220 28 0.4760334 0.109
D12 799.33270 66 0.0000000 0.000
D10 492.53039 28 0.0000000 0.000
A12 11.89948 10 0.2918403 0.207
C03 56.70154 36 0.0153672 0.095
#li
library(radiator)
#https://thierrygosselin.github.io/radiator/reference/genomic_converter.html
#https://thierrygosselin.github.io/radiator/articles/rad_genomics_computer_setup.html
mydata1 <- genomic_converter(mydata_genind, parallel.core = parallel::detectCores() - 1, output = "genepop", filename = "../data/mydata.genepop.txt")
library(genepop)
# https://cran.r-project.org/web/packages/genepop/genepop.pdf
genepop_HW <- test_HW(
inputFile = "../data/mydata.genepop.txt",
which = "Proba",
outputFile = "",
settingsFile = "",
enumeration = FALSE,
dememorization = 10000,
batches = 20,
iterations = 5000,
verbose = interactive()
)
I have found different values from Fstats.
The index of association was originally developed by A.H.D. Brown analyzing population structure of wild barley (Brown, 1980).
Ia: The index of association. p.Ia: The p-value resulting from a one-sided permutation test based on the number of samples indicated in the original call. rbard: The standardized index of association. p.rd: The p-value resulting from a one-sided permutation test based on the number of samples indicated in the original call.
pair.ia calculates the index of association in a pairwise manner among all loci.
loci_pair <- pair.ia(mydata_genind, sample = 1000, quiet = FALSE, method = 1,plot = FALSE)
|
| | 0%
|
| | 0%
|
|== | 2%
|
|==== | 4%
library(genepop)
test_LD(
inputFile = "../data/mydata.genepop.txt",
outputFile = "",
settingsFile = "",
dememorization = 10000,
batches = 100,
iterations = 5000,
verbose = interactive()
)
LD2 is based on the observed frequencies of different genotypes (Schaid 2004).
print(linkage_pegas$T2)
T2 df P-val
69.43451606 48.00000000 0.02313167
We thus accept the null hypothesis of no linkage among markers (P-val = 0.02313167)
DEVELOPMENT
not ready for deployment
#shuffled_matrices <- replicate(n_rep, mat[sample(nrow(mat)), ], simplify = FALSE)
shuffled_matrices <- replicate(n_rep, mat[sample(length(mat), replace = FALSE)], simplify = FALSE)
##################
# shuffle only the genotype and add the pop column later for each matrices.
#in the loop?
###############
# Create a list to store the wc
fst_df <- numeric(sequence_length)
fis_df <- numeric(sequence_length)
# Calculate the average for each shuffled matrix
# Iterate through the shuffled matrices
for (i in 1:n_rep) {
# Calculate the statistics for the i-th matrix
#HERE THE COLUMN POP
merged_df <- cbind(level_pop, shuffled_matrices[[i]])
result_f_stats <- wc(shuffled_matrices[[i]])
result_f_stats <- as.data.frame(result_f_stats$per.loc)
# Extract FST and FIS values
fst_values <- result_f_stats$FST
fis_values <- result_f_stats$FIS
print( fst_values)
# Assign values to the data frames
fst_df <- cbind(fst_df, result_f_stats$FST)
fis_df <- cbind(fis_df, result_f_stats$FIS)
}
# Set row names as in result_f_stats
rownames(fst_df) <- rownames(fis_df) <- rownames(result_f_stats)
result_FST <- fst_df[, -1]
fis_df <-fis_df[, -1]
vec <- seq(1, n_rep)
colnames(result_FST) <- colnames(fis_df) <- vec
result_FST[1,]
count (result_f_stats[,1][1] > result_FST[1,] )
count <- sum(result_f_stats[,1][1] > result_FST[1, ])
# Initialize an empty data frame to store the counts
count_df <- data.frame(
Greater = numeric(length(result_FST)),
Smaller = numeric(length(result_FST))
)
# Compare the values in result_f_stats[1] to result_FST for each column
for (col in colnames(result_FST)) {
greater_count <- sum(result_f_stats[1] > result_FST[, col])
smaller_count <- sum(result_f_stats[1] < result_FST[, col])
count_df$Greater[col] <- greater_count
count_df$Smaller[col] <- smaller_count
}
# Print the count data frame
print(count_df)
######################## ########################
library(radiator)
genomic_converter(
data,
strata = NULL,
output = NULL,
filename = NULL,
parallel.core = parallel::detectCores() - 1,
verbose = TRUE,
...
)